from lxml import html
import requests
import xlwt

book = xlwt.Workbook(encoding="utf-8")
sheet = book.add_sheet(sheetname="豆瓣图书TOP250")
sheet.write(0, 0, "title")
sheet.write(0, 1, "imageUrl")
sheet.write(0, 2, "author")
sheet.write(0, 3, "press")
sheet.write(0, 4, "date")
sheet.write(0, 5, "price")
sheet.write(0, 6, "comment")
sheet.write(0, 7, "rate")
sheet.write(0, 8, "quote")
sheet.write(0, 9, "translator")
etree = html.etree
headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
}
urls = ["https://book.douban.com/top250?start={}".format(str(i)) for i in range(0, 250, 25)]
bookList = list()
n = 0
for url in urls:
    html = requests.get(url, headers=headers)
    html.encoding = "utf-8"
    selector = etree.HTML(html.text)
    infos = selector.xpath('//tr[@class="item"]')  # 25
    info = infos[0]
    titles = info.xpath('//td/div/a/@title')
    imageUrls = info.xpath('//td/a/img/@src')
    bookInfos = info.xpath('//td/p/text()')
    # 去掉空格及换行符
    bookInfos = [x.strip() for x in bookInfos if x.strip() != ""]
    comments = info.xpath("//td/div/span[3]/text()")
    rates = info.xpath('///td/div/span[2]/text()')
    quotes = info.xpath('//td/p/span/text()')
    # 一个url里面拆分出25本书
    for title, imageUrl, bookInfo, comment, rate in zip(titles, imageUrls, bookInfos, comments, rates,
                                                               ):
        length = len(bookInfo.split("/"))
        if length == 4:
            author = bookInfo.split("/")[0]
            press = bookInfo.split("/")[1]
            translator = "null"
            date = bookInfo.split("/")[2]
            price = bookInfo.split("/")[3]
        elif length == 5:
            author = bookInfo.split("/")[0]
            translator = bookInfo.split("/")[1]
            press = bookInfo.split("/")[2]
            date = bookInfo.split("/")[3]
            price = bookInfo.split("/")[4]
        else:
            author = "null"
            translator = "null"
            press = "null"
            date = "null"
            price = "null"
        translator = "null" if translator == "null" else translator
        data = {
            "title": title,
            "imageUrl": imageUrl,
            "author": author,
            "translator": translator,
            "press": press,
            "date": date,
            "price": price,
            "comment": comment,
            "rate": rate
            #"quote": quote
        }
        n = n + 1
        sheet.write(n, 0, title)
        sheet.write(n, 1, imageUrl)
        sheet.write(n, 2, author)
        sheet.write(n, 3, press)
        sheet.write(n, 4, date)
        sheet.write(n, 5, price)
        sheet.write(n, 6, comment)
        sheet.write(n, 7, rate)
        #sheet.write(n, 8, quote)
        sheet.write(n, 9, translator)
        print(data)
book.save("豆瓣图书TOP250.xls")

print(n)
